//User-written commands to be installed
// distinct
// did_imputation
// ppmlhdfe

//load dataset
clear all
cd "`c(pwd)'"
import delimited ".\citation_panel.csv", clear encoding("utf8")


///////////////////////////////////
//CONTENT:
//        - 1: VARIABLE DEFINITIONS AND INITIAL SAMPLE SELECTION
//				- 1.1: Treatment variables
//				- 1.2: Other variables needed for analysis
//				- 1.3: Initial Sample Selection
//				- 1.4: Dependent variables
//		  - 2: OLS ANALYSIS
//				- 2.1: Main Event study graph
//				- 2.2: Pre vs Post results for main depdent variable, but different subsamples
//				- 2.3: Citations by gender or social distance
//				- 2.4: Citations by gender and social distance
//				- 2.5: Robustness: Anticipatory Effects when shifting outcome_year 1, 2 or 3 years forward
//				- 2.6: Robustness: Incident Year as Treatment Date
//				- 2.7: Robustness: Stayers vs leavers, Perps who continue publishing vs those who stop
//				- 2.8: Media analysis: All results that make use of newspaper information
//				- 2.9: Robustness: Event graph with citation pre-trend matching
//              - 2.10: Robustness: Truncation
//		  - 3: POISSON ANALYSIS
//				- 3.1: Main estimate, citations by male first authors, citations by female first authors


//////////////////////////
//1.1 Treatment variables
//Define treatment year and treatment indicator
gen treatment_year=outcome_year
gen year_rel_treatment=citation_year-treatment_year
gen treatment_post=treated*(citation_year>=treatment_year)
gen treatment_timing=treatment_year*treated
replace treatment_timing=. if treated==0

//////////////////////////////
//1.2 Other variables needed for analysis
//Generate numeric identifers
	//encode article, sample (use 'group' instead of 'encode' because of number of distinct articles) and incidents and drop duplicates
egen article_id_num = group(eid)
egen sample_id_num = group(sample)
egen source_id_num = group(source_id)
encode incident_id, gen(incident_id_num)

egen idname=group(article_id_num incident_id_num)

//Generate other useful variables
gen field_2digit=int(source_asjc/100)
replace field_2digit=0 if field_2digit==. //assign journals without field to residual category
egen earliest_pub_year=min(pub_year), by(incident_id_num)  // earliest publication year for perpetrator of the incident
egen latest_pub_year=max(pub_year), by(incident_id_num)    //latest publication year for perpetrator of the incident

//clustering for different articles by the perpetrator
egen cl=group(incident_id_num)
replace cl=article_id_num*1000 if treated==0

//create publication decile variable
gen source_decile=floor(source_bin/10)
replace source_decile=0 if source_bin==10
//Old vs new articles
gen new_article=treatment_year-pub_year<=5

//Top journals
gen top10=source_bin<=10
//male dominated fields
gen maj_male_dom=share_asjc_male_dom>=0.5 //Main definition adopted in the paper

//newspaper media
gen reporting_delay=earliest_lexisnexis_year-outcome_year

////////////////////////////////
//1.3 Subset data
keep if pub_year<treatment_year

// Drop duplicates
duplicates tag article_id_num citation_year, gen(dupe_article)
gen treated_dupe_article=treated*(dupe_article>0)
replace treated_dupe_article=. if treated_dupe_article==0

set seed 42
gen r_nr=rnormal()
bys idname: replace r_nr=r_nr[1]

gen r_treated_dupe_article=r_nr*treated_dupe_article
egen min_r_treated_dupe_article=min(r_treated_dupe_article), by(article_id_num)
gen to_be_dropped=min_r_treated_dupe_article<r_treated_dupe_article
egen sample_to_be_dropped=max(to_be_dropped), by(sample_id_num)

drop if sample_to_be_dropped==1

drop dupe_article
drop treated_dupe_article
drop r_nr
drop r_treated_dupe_article
drop min_r_treated_dupe_article
drop to_be_dropped
drop sample_to_be_dropped

////////////////////////////////
//1.4 Dependent variables
gen arsinh_cit_count=asinh(cit_count)

//Counts by gender composition of author teams
gen arsinh_cit_count_female_fa=asinh(cit_count_female_first_author)
gen arsinh_cit_count_male_fa=asinh(cit_count_male_first_author)

//Social Distance
//Note: Distance 3_inf includes citations by researchers not in network
gen ah_cites_soc_dist_1=asinh(cit_count_soc_dist_1)
gen ah_cites_soc_dist_2=asinh(cit_count_soc_dist_2)
gen cit_count_soc_dist_3_inf=cit_count-(cit_count_soc_dist_1+cit_count_soc_dist_2)
gen ah_cites_soc_dist_3_inf=asinh(cit_count_soc_dist_3_inf)

gen ah_cites_male_soc_dist_1=asinh(cit_count_male_soc_dist_1)
gen ah_cites_male_soc_dist_2=asinh(cit_count_male_soc_dist_2)
gen cit_count_male_soc_dist_3_inf=cit_count_male_first_author-(cit_count_male_soc_dist_1+cit_count_male_soc_dist_2)
gen ah_cites_male_soc_dist_3_inf=asinh(cit_count_male_soc_dist_3_inf)

gen ah_cites_female_soc_dist_1=asinh(cit_count_female_soc_dist_1)
gen ah_cites_female_soc_dist_2=asinh(cit_count_female_soc_dist_2)
gen cit_count_female_soc_dist_3_inf=cit_count_female_first_author-(cit_count_female_soc_dist_1+cit_count_female_soc_dist_2)
gen ah_cites_female_soc_dist_3_inf=asinh(cit_count_female_soc_dist_3_inf)

save "./citation_dataset.dta", replace

////////////////////////////////////////////
//2 - OLS ANALYSIS


/////////////////
//2.1 Main Event study graph
//Main result Event-study graph

file open myfile using "did_leads_and_lags_dep_mean.txt", write replace
file write myfile "cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname sample_id_num##citation_year) autosample cluster(cl) pretrends(4) horizons(0/4)
esttab using did_leads_and_lags_all.csv, se replace

local pre_p = round(e(pre_p), 0.001)
file open p_file using ".\output\numbers\MainPretrendJointP.txt", write replace
file write p_file "=0`pre_p'"
file close p_file

eststo clear


/////////////////
//2.2 Pre vs Post results, using the main dependent variable, but for different samples
// A. Main effect estimate of the paper
// B. Effect estimate for subsample of articles in Top 10% journals
// C. Old articles
// D. Articles in Male dominated fields
// E. Articles in Not-male dominated fields

//A.
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)

file open myfile using "did_pre_post.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "main," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

//Two-sided test for equality of coefficients male dominated vs. not male dominated
did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) hetby(maj_male_dom)
test tau_0=tau_1
local p_val = round(r(p), 0.001)
file open p_file using ".\output\numbers\Coef_Test_Male_dom_Est_P.txt", write replace
file write p_file "=0`p_val'"
file close p_file

//B.
preserve
keep if source_bin<=10

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)

file open myfile using "did_pre_post.txt", write append
sum cit_count,detail
file write myfile "top," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//C.
preserve
keep if new_article==0
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_pre_post.txt", write append
sum cit_count,detail
file write myfile "old," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//D.
preserve
keep if maj_male_dom==1 & source_asjc!=1000 & share_asjc_male_dom!=-1

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_pre_post.txt", write append
sum cit_count,detail
file write myfile "male_dom," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//E.
preserve
keep if maj_male_dom==0 & source_asjc!=1000 & share_asjc_male_dom!=-1
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_pre_post.txt", write append
sum cit_count,detail
file write myfile "non_male_dom," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//output to file
esttab using did_pre_post.csv, se replace
eststo clear


/////////////////
//2.3 Citations by gender or social distance (Table 1)
//
// Table 1A. citations by male authors
// Table 1B. citations by male authors in male-dominated fields (main definition used in the paper)
// Table 1C. citations by male authors in not-male-dominated fields (main definition used in the paper)
// Table 1D. citations by female authors
// Table 1E. citations by female authors in male-dominated fields (main definition used in the paper)
// Table 1F. citations by female authors in not-male-dominated fields (main definition used in the paper)
// Table 1G. citations by coauthors distance=1
// Table 1H. citations by coauthors distance=2
// Table 1I. citations by coauthors distance>=3

file open myfile using "did_dep_vars.txt", write replace
file write myfile "spec,cit_mean"_n
//1A.
eststo: quietly did_imputation arsinh_cit_count_male_fa idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_first_author,detail
file write myfile "male_cit," (r(mean)) _n
//1B.
eststo: quietly did_imputation arsinh_cit_count_male_fa idname citation_year treatment_timing if maj_male_dom==1 & source_asjc!=1000 & share_asjc_male_dom!=-1, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_first_author if maj_male_dom==1 & source_asjc!=1000 & share_asjc_male_dom!=-1,detail
file write myfile "male_cit_male_dom," (r(mean)) _n
//1C.
eststo: quietly did_imputation arsinh_cit_count_male_fa idname citation_year treatment_timing if maj_male_dom==0 & source_asjc!=1000 & share_asjc_male_dom!=-1, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_first_author if maj_male_dom==0 & source_asjc!=1000 & share_asjc_male_dom!=-1,detail
file write myfile "male_cit_not_male_dom," (r(mean)) _n
//1D.
eststo: quietly did_imputation arsinh_cit_count_female_fa idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_first_author,detail
file write myfile "female_cit," (r(mean)) _n
//1E.
eststo: quietly did_imputation arsinh_cit_count_female_fa idname citation_year treatment_timing if maj_male_dom==1 & source_asjc!=1000 & share_asjc_male_dom!=-1, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_first_author if maj_male_dom==1 & source_asjc!=1000 & share_asjc_male_dom!=-1,detail
file write myfile "female_cit_male_dom," (r(mean)) _n
//1F.
eststo: quietly did_imputation arsinh_cit_count_female_fa idname citation_year treatment_timing if maj_male_dom==0 & source_asjc!=1000 & share_asjc_male_dom!=-1, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_first_author if maj_male_dom==0 & source_asjc!=1000 & share_asjc_male_dom!=-1,detail
file write myfile "female_cit_not_male_dom," (r(mean)) _n
//1G.
eststo: quietly did_imputation ah_cites_soc_dist_1 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_soc_dist_1,detail
file write myfile "dist_one," (r(mean)) _n
//1H.
eststo: quietly did_imputation ah_cites_soc_dist_2 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_soc_dist_2,detail
file write myfile "dist_two," (r(mean)) _n
//1I.
eststo: quietly did_imputation ah_cites_soc_dist_3_inf idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_soc_dist_3,detail
file write myfile "dist_three," (r(mean)) _n

file close myfile

//output to file
esttab using did_dep_vars.csv, se replace
eststo clear


/////////////////
//2.4 Citations by gender and social distance (Table 2)
//
// Table 2A. citations by male authors at distance=1
// Table 2B. citations by male authors at distance=2
// Table 2C. citations by male authors at distance>=3
// Table 2D. citations by female authors at distance=1
// Table 2E. citations by female authors at distance=2
// Table 2F. citations by female authors at distance>=3

file open myfile using "did_soc_dist_gender.txt", write replace
file write myfile "spec,cit_mean"_n

//2A.
eststo: quietly did_imputation ah_cites_male_soc_dist_1 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_soc_dist_1,detail
file write myfile "dist_one_male_cit," (r(mean)) _n
//2B.
eststo: quietly did_imputation ah_cites_male_soc_dist_2 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_soc_dist_2,detail
file write myfile "dist_two_male_cit," (r(mean)) _n
//2C.
eststo: quietly did_imputation ah_cites_male_soc_dist_3_inf idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_male_soc_dist_3_inf,detail
file write myfile "dist_three_male_cit," (r(mean)) _n

//2D.
eststo: quietly did_imputation ah_cites_female_soc_dist_1 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_soc_dist_1,detail
file write myfile "dist_one_female_cit," (r(mean)) _n
//2E.
eststo: quietly did_imputation ah_cites_female_soc_dist_2 idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_soc_dist_2,detail
file write myfile "dist_two_female_cit," (r(mean)) _n
//2F.
eststo: quietly did_imputation ah_cites_female_soc_dist_3_inf idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
sum cit_count_female_soc_dist_3_inf,detail
file write myfile "dist_three_female_cit," (r(mean)) _n

file close myfile

//output to file
esttab using did_soc_dist_gender.csv, se replace
eststo clear


/////////////////
//2.5 Robustness: Anticipatory effects when shifting the outcome-year 1, 2 or 3 years forward
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) shift(1)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) shift(2)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) shift(3)

//output to file
esttab using did_pre_post_anticip.csv, se replace
eststo clear


/////////////////
//2.6 Robustness: Using the incident year instead of the outcome_year as treatment date
gen year_rel_incident=citation_year-incident_year
gen incident_post=treated*(citation_year>=incident_year)
gen incident_timing=incident_year*treated
replace incident_timing=. if treated==0

preserve
keep if incident_year!=.
keep if pub_year<incident_year

file open myfile using "did_pre_post_incident.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "incident," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

eststo: quietly did_imputation arsinh_cit_count idname citation_year incident_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) pretrends(10)
restore

//output to file
esttab using did_pre_post_incident.csv, se replace
eststo clear


////////////////////////
//2.7 Robustness: Stayers vs leavers and Perpetrators who continue to publish vs those who stop

// A. Effect for incidents where the perpetrator is a "stayer"
// B. Effect for incidents where the perpetrator is a "leaver"
// C. Effect for incidents where the perpetrator continues publishing
// D. Effect for incidents where the perpetrator stops publishing

//A.
preserve
keep if outc_leaver==0

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)

file open myfile using "did_stayers_leavers.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "stayers," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

restore

//B.
preserve
keep if outc_leaver==1

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_stayers_leavers.txt", write append
sum cit_count,detail
file write myfile "leavers," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//C.
preserve
keep if outcome_year<=2018
keep if latest_pub_year-outcome_year>=2

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_stayers_leavers.txt", write append
sum cit_count,detail
file write myfile "cont_publ," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

restore

//D.
preserve
keep if outcome_year<=2018
keep if latest_pub_year-outcome_year<2

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_stayers_leavers.txt", write append
sum cit_count,detail
file write myfile "stop_publ," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//output to file
esttab using did_stayers_leavers.csv, se replace
eststo clear

// Parameter tests
did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) hetby(outc_leaver)
test tau_0=tau_1
local p_val = round(r(p), 0.001)
file open p_file using ".\output\numbers\Coef_Test_leaver_Est_P.txt", write replace
file write p_file "=0`p_val'"
file close p_file

gen outc_cont=1 if latest_pub_year-outcome_year>=2
replace outc_cont=0 if outc_cont ==.
did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl) hetby(outc_cont)
test tau_0=tau_1
local p_val = round(r(p), 0.001)
file open p_file using ".\output\numbers\Coef_Test_cont_Est_P.txt", write replace
file write p_file "=0`p_val'"
file close p_file
drop outc_cont

//////////////////////////////////////////////
//2.8 Media analysis
//
// A. Effect for incidents in big newspapers
// B. Effect for incidents in small newspapers
// C. Effect when there is no reporting delay, i.e. earliest_lexisnexis_year<=outcome_year (not used in paper)
// D. Effect when there is reporting delay, i.e. earliest_lexisnexis_year>outcome_year (not used in paper)
// E. Robustness: Using the minimum of earliest newspaper year and outcome year as treatment date

//A.
preserve
keep if earliest_lexisnexis_year!=.
keep if big_newspaper==1
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_media.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "big_newspaper," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//B.
preserve
keep if earliest_lexisnexis_year!=.
keep if big_newspaper==0
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_media.txt", write append
sum cit_count,detail
file write myfile "small_newspaper," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//C.
preserve

keep if earliest_lexisnexis_year!=.
keep if reporting_delay<=0
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_media.txt", write append
sum cit_count,detail
file write myfile "no_rep_delay," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//D.
preserve

keep if earliest_lexisnexis_year!=.
keep if reporting_delay>0
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname sample_id_num##citation_year) autosample cluster(cl)
file open myfile using "did_media.txt", write append
sum cit_count,detail
file write myfile "rep_delay," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

//E.
preserve

gen t_date=outcome_year
replace t_date=earliest_lexisnexis_year if earliest_lexisnexis_year<outcome_year
gen t_timing=t_date*treated
replace t_timing=. if treated==0

keep if pub_year<t_date

eststo: quietly did_imputation arsinh_cit_count idname citation_year t_timing, fe(idname sample_id_num##citation_year) autosample cluster(cl)

file open myfile using "did_media.txt", write append
sum cit_count,detail
file write myfile "anticip_earliest_n_p," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

drop t_date
drop t_timing

restore

//output to file

esttab using did_media.csv, se replace
eststo clear


//////////////////////////////////
//2.9 Robustness: Event graph with citation pre-trend matching
preserve

gen pre_period=(citation_year<outcome_year)
egen num_pre_periods=sum(pre_period), by(idname)

gen outcome_pre=arsinh_cit_count*(citation_year<outcome_year)
gen outcome_pre_only_treated=outcome_pre*treated
replace outcome_pre_only_treated=. if treated==0

egen outcome_pre_group_treated=min(outcome_pre_only_treated), by(sample_id_num citation_year)
gen outcome_pre_diff_sqr=(outcome_pre-outcome_pre_group_treated)^2

egen sum_diff_sqr=sum(outcome_pre_diff_sqr), by(idname)
gen avg_diff_sqr=sum_diff_sqr/num_pre_periods

egen rank=rank(-avg_diff_sqr), field by(sample_id_num treated)
egen rank_2=rank(-sample_article_num), field by(sample_id_num treated rank)
egen avg_diff_sqr_group=max((rank==1)*(rank_2==1)*avg_diff_sqr), by(sample_id_num)

keep if rank==1 & rank_2==1

file open myfile using "did_leads_and_lags_dep_mean_cit_matched.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "all," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing , fe(idname pub_year##citation_year) pretrends(4) horizons(0/4) autosample cluster(cl)
esttab using did_leads_and_lags_all_cit_matched.csv, se replace
eststo clear

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing, fe(idname pub_year##citation_year) autosample cluster(cl)
esttab using did_pre_post_all_cit_matched.csv, se replace
eststo clear

restore

/////////////////////////////////////
///2.10 Robustness: Truncation

preserve
keep if outcome_year<=2021-4 & (citation_year-outcome_year)<=4

file open myfile using "did_trunc.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "trunc_four_years," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

preserve
keep if outcome_year<=2021-6 & (citation_year-outcome_year)<=6

file open myfile using"did_trunc.txt", write append
sum cit_count,detail
file write myfile "trunc_six_years," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

preserve
keep if outcome_year<=2021-8 & (citation_year-outcome_year)<=8

file open myfile using"did_trunc.txt", write append
sum cit_count,detail
file write myfile "trunc_eight_years," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

preserve
keep if outcome_year<=2021-10 & (citation_year-outcome_year)<=10

file open myfile using"did_trunc.txt", write append
sum cit_count,detail
file write myfile "trunc_ten_years," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

preserve
keep if outcome_year<=2021-12 & (citation_year-outcome_year)<=12

file open myfile using"did_trunc.txt", write append
sum cit_count,detail
file write myfile "trunc_twelve_years," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile
restore

eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing if outcome_year<=(2021-4) & (citation_year-outcome_year)<=4, fe(idname sample_id_num##citation_year) autosample cluster(cl)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing if outcome_year<=(2021-6) & (citation_year-outcome_year)<=6, fe(idname sample_id_num##citation_year) autosample cluster(cl)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing if outcome_year<=(2021-8) & (citation_year-outcome_year)<=8, fe(idname sample_id_num##citation_year) autosample cluster(cl)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing if outcome_year<=(2021-10) & (citation_year-outcome_year)<=10, fe(idname sample_id_num##citation_year) autosample cluster(cl)
eststo: quietly did_imputation arsinh_cit_count idname citation_year treatment_timing if outcome_year<=(2021-12) & (citation_year-outcome_year)<=12, fe(idname sample_id_num##citation_year) autosample cluster(cl)

esttab using did_trunc.csv, se replace
eststo clear


////////////////////////////////////////////
//3 - POISSON ANALYSIS

/////////////////
//3.1 Main estimate, citations by male first authors, citations by female first authors

//Main estimate
eststo: quietly ppmlhdfe cit_count treatment_post, absorb(idname sample_id_num##citation_year) cluster(cl)
file open myfile using "poisson_main.txt", write replace
file write myfile "spec,cit_mean,n_obs,d_articles,d_incidents"_n
sum cit_count,detail
file write myfile "poisson_main," (r(mean)) ","
distinct article_id_num
file write myfile (r(N)) ","
file write myfile (r(ndistinct)) ","
distinct incident_id_num
file write myfile (r(ndistinct)) _n
file close myfile

//citations by male first author
eststo: quietly ppmlhdfe cit_count_male_first_author treatment_post, absorb(idname sample_id_num##citation_year) cluster(cl)
file open myfile using "poisson_main.txt", write append
sum cit_count_male_first_author,detail
file write myfile "poisson_male_cit," (r(mean)) _n

//citations by female first author
eststo: quietly ppmlhdfe cit_count_female_first_author treatment_post, absorb(idname sample_id_num##citation_year) cluster(cl)
sum cit_count_female_first_author,detail
file write myfile "poisson_female_cit," (r(mean)) _n
file close myfile

//output to file
esttab using poisson_main.csv, se replace
eststo clear
